library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(plotly)
library(leaflet)
options(
tigris_class = "sf",
tigris_use_cache = T # This stores tigris loads somewhere on your machine for much faster personal loading.
)
This script uses a social distancing dataset from Safegraph to track social distancing compliance in San Jose.
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_blockgroups <-
bay_county_names %>%
map(function(x){
block_groups("CA",x,progress_bar=F) %>%
pull(GEOID)
}) %>% unlist()
bay_counties <-
counties("CA", cb = F, progress_bar=F) %>%
filter(NAME %in% bay_county_names)
scc_blockgroups <-
block_groups("CA","Santa Clara", cb=F, progress_bar=F)
# Below uses tracts sent to us by San Jose
sj_tracts <- st_read("/users/ctenner/Pcloud Drive/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
sj_citycouncil_disticts <- st_read("/users/ctenner/Pcloud Drive/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
sj_blockgroups <-
scc_blockgroups %>%
st_centroid() %>%
st_join(sj_tracts, left = F) %>%
st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
mutate(
DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
) %>%
st_set_geometry(NULL) %>%
left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
st_as_sf() %>%
dplyr::select(GEOID, DISTRICTS)
# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9
sj_boundary <-
places("CA", cb=F, progress_bar=F) %>%
filter(NAME == "San Jose")
#sj_blockgroups <-
# scc_blockgroups %>%
# dplyr::select(GEOID) %>%
# st_join(sj_boundary %>% dplyr::select(geometry), left = F) #%>%
# st_set_geometry(NULL) %>%
# left_join(scc_blockgroups %>% dplyr::select(GEOID)) %>%
# st_as_sf() # last lines here not necessary because we didn't convert to centroid. thanks for the catch, cameron!
mapview(sj_blockgroups, zcol = "DISTRICTS")+mapview(sj_boundary,alpha.region= 0, color = "red", lwd = 4)
# Note that in this case, unlike the R Basics example, I don't convert to centroid before the `st_join()`, because there are random holes within SJ official geopolitical boundaries (whole different story) that certainly shouldn't be removed from our analysis. So the `st_join()` holds onto anything that even touches the SJ boundary. But note there's a huge rural block group to the east of SJ that comes along for the ride that's practically the size of SJ, which you may decide to manually remove for the sake of better visualiation for now.
# sj_blockgroups <-
# sj_blockgroups %>%
# filter(!GEOID %in% c("060855135001"))
#
# mapview(sj_blockgroups)
# Loading social distancing data
# Loading Bay social distancing data will still take some time, but much faster than loading the whole US. We'll quickly create a SJ version too. When you open this file, you'll probably see that the first few steps have been commented out to reduce knitting time, but you should practice uncommenting and running all to understand how it works.
#
# Now's a good time to also remind you that per your data sharing agreement, raw Safegraph data like this should definitely stay within the F Drive Restricted Data Library. You'll see at the end the level of aggregation that is then OK to save outside of the F Drive. The gray area in-between will come down to teaching team judgment.
#
# As you start to save RDS files, be very careful about overwriting existing files. We're going to be backing things up a lot, but just be aware and definitely notify us if you think something has gone wrong.
# bay_socialdistancing <-
# readRDS("P:/SFBI/Restricted Data Library/Safegraph/covid19analysis/bay_socialdistancing.rds")
#
# sj_socialdistancing <-
# bay_socialdistancing %>%
# filter(origin_census_block_group %in% sj_blockgroups$GEOID)
#
# saveRDS(sj_socialdistancing, file = "P:/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_socialdistancing.rds")
sj_socialdistancing <- readRDS("/users/ctenner/Pcloud Drive/SFBI/Restricted Data Library/Safegraph/covid19analysis/sj_socialdistancing.rds")
# We will eventually join the Safegraph data to our geographies, but right now since the data includes many rows for each block group (individual days), this is not a good time to do it.